---
title: "Compilation and Analysis Code for: Combining Bottom-Up Monitoring and Top-Down Accountability: A Field Experiment on Managing Corruption in Uganda"
date: "10/17/2019"
output: 
  html_document:
    theme: united
    toc: true
---

## Setup

```{r rmd-setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
library(ggplot2)
library(stargazer)
library(estimatr)
library(dplyr)
library(lfe)
library(tidyr)

simn <- 10000 #number of RI runs, decrease significantly for fast compilation
```

```{r data-read}

audit.v <- read.csv("./Audits_Village_Analysis.csv", stringsAsFactors=FALSE)
audit.c <- read.csv("./Audits_Component_Analysis.csv", stringsAsFactors=FALSE)

survey <- read.csv("./survey_anon.csv")
#note: respodent_name and mobile_phone replaced with unique random values to protect subject anonymity

village.sample <- read.csv("./Village_Sample_MASTER.csv", stringsAsFactors=FALSE)
#note: the village id's come from the master revenue sharing list from UWA.

p2.assigned <- read.csv("./p2samp.csv", stringsAsFactors=FALSE)[,-1] 
#note: this is a modification of "loc.master" with random assignments as "treat2"

p2.ri <- read.csv("./p2_ri.csv", stringsAsFactors=FALSE)[,-1] 
#note: this is a file based on "loc.master" with 10000 random assignment draws

vlist.ordered.dta <- read.csv("./BwindiVillageOrder_190201_JN.csv", stringsAsFactors=FALSE)
#note: this is the list of villages in contiguous order

```

```{r analysis-functions}

se.boot <- function(sims=simn,seed=201,vector){
  store <- rep(NA,sims)
  set.seed(seed)
  for (i in 1:sims){
    store[i] <- mean(sample(vector, replace=T))
  }
  return(sd(store))
}


se.boot.cluster <- function(sims=simn,seed=201,vector,clusters){
  store <- rep(NA,sims)
  set.seed(seed)

  v.OK <- !is.na(vector)
  vector <- vector[v.OK]
  clusters <- clusters[v.OK]
  
  clusters_index <- unique(clusters)
  
  for (i in 1:sims){
    sam_clus <- sample(clusters_index, length(clusters_index), replace=TRUE)
    boot_dat <- NULL
    
    for(j in 1:length(sam_clus)){
      cc <- vector[clusters==sam_clus[j]]
      boot_dat <- c(boot_dat, cc)
    }
    
    store[i] <- mean(boot_dat)
  }
  
  return(sd(store))
}


##Function: randomization inference for difference in means
ri.diff <- function(sims, ri.obj, outcome.var, treat.var, treat.ri.var, info.cols){
  ate <- mean(ri.obj[ri.obj[,treat.var]==1,outcome.var], na.rm=TRUE) - mean(ri.obj[ri.obj[,treat.var]==0,outcome.var], na.rm=TRUE)
  store <- rep(NA,sims)
  
  for (i in 1:sims){
    ri.obj[,treat.ri.var] <- ri.obj[,info.cols+i]
    store[i] <- mean(ri.obj[ri.obj[,treat.ri.var]==1,outcome.var], na.rm=TRUE) - mean(ri.obj[ri.obj[,treat.ri.var]==0,outcome.var], na.rm=TRUE)
  }
  
  outcome <- outcome.var
  se.null <- sd(store)
  p.two.way <- sum(abs(ate)<abs(store))/sims
  p.one.way.greater <- sum(ate<=store)/sims
  p.one.way.lesser <- sum(ate>=store)/sims
  
  out <- list("outcome" = outcome, "ate" = ate, "rand.dist" = store, "se.null"=se.null, "p.two.way" = p.two.way, "p.one.way.greater" = p.one.way.greater, "p.one.way.lesser" = p.one.way.lesser)
  return(out)
}


##Function: randomization inference for felm()
felm.ri <- function(formula, dta, treat.var, rand.ob, rand.ob.info.cols, sims, ...){
  require(lfe)
#1. data and rand.ob must have rows organized in identical order")
#2. treat.var should be first right-side entry in formula")
#3. join.var must have identical name between data and rand.ob")
  
  ate <- coef(felm(formula, data=dta))[1]
  N <- felm(formula, data=dta)$N
  ate.samp.dist <- rep(NA,sims)
  
  for (i in 1:sims){
    dta[,treat.var] <- rand.ob[,rand.ob.info.cols+i]
    
    ate.samp.dist[i] <- coef(felm(formula, data=dta))[1]
  }
  
  p.two.way <- sum(abs(ate)<abs(ate.samp.dist))/sims
  p.one.way.greater <- sum(ate<ate.samp.dist)/sims
  p.one.way.lesser <- sum(ate>ate.samp.dist)/sims
  se.null <- sd(ate.samp.dist)
  
  coun <- list("ate" = ate, "ate.samp.dist" = ate.samp.dist, "se.null"=se.null, "p.two.way" = p.two.way, "p.one.way.greater" = p.one.way.greater, "p.one.way.lesser" = p.one.way.lesser, "N" = N)
  return(coun)
}

```

```{r table-functions}

coef.print <- function(vector){
  sprintf("%.3f",round(vector,3))
}

coef.print2 <- function(vector){
  sprintf("%.2f",round(vector,2))
}

se.print <- function(vector){
  paste("(",sprintf("%.3f",round(vector,3)),")",sep="")
}

se.print2 <- function(vector){
  paste("(",sprintf("%.2f",round(vector,2)),")",sep="")
}

p.print <- function(vector){
  paste("p=",sprintf("%.3f", round(vector,3)),sep="")
}

tab_cell <- function(ri_obj){
  return(tribble(~h,coef.print(ri_obj$ate),se.print(ri_obj$se.null), p.print(ri_obj$p.one.way.greater)))
}

tab_cell_desc <- function(desc,desc.se,i){
  return(tribble(~d,coef.print(desc[i]),se.print(desc.se[i]),""))
}

empty_cell <- tribble(~h, "--", "(--)", "")

```

## Data cleaning and compilation

```{r audit-data-clean}
###Location ID compile and cleaning -----

village.sample.no.dup <- village.sample[!(duplicated(village.sample$village.name) | duplicated(village.sample$village.name, fromLast = TRUE)),]
#Note: will have to work on all duplicate villages manually

p2.assigned$village.id <- NA
for (i in 1:nrow(p2.assigned)){
  p2.assigned$village.id[i] <- ifelse((sum(p2.assigned$village.final[i]==p2.assigned$village.final)==1 & p2.assigned$village.final[i] %in% village.sample.no.dup$village.name),
                                      village.sample.no.dup$village.id[p2.assigned$village.final[i]==village.sample.no.dup$village.name],
                                      NA)
}

#Filling in the other village.id's manually using fuzzy matches from village.sample
p2.assigned$village.id <- ifelse(p2.assigned$location.id.cleaned=="Rubanda.Ruhija.Kashekyera.Bitanwa", 62, p2.assigned$village.id)
p2.assigned$village.id <- ifelse(p2.assigned$village.final=="Bugoro" & !is.na(p2.assigned$treat2), 48, p2.assigned$village.id)
p2.assigned$village.id <- ifelse(p2.assigned$village.final=="Buhoma", 10, p2.assigned$village.id)
p2.assigned$village.id <- ifelse(p2.assigned$village.final=="Ishaya", 49, p2.assigned$village.id)
p2.assigned$village.id <- ifelse(p2.assigned$village.final=="Kajumo" & !is.na(p2.assigned$treat2), 35, p2.assigned$village.id)
p2.assigned$village.id <- ifelse(p2.assigned$village.final=="Kanyabuhama", 46, p2.assigned$village.id)
p2.assigned$village.id <- ifelse(p2.assigned$village.final=="Kanyashogi", 17, p2.assigned$village.id)
p2.assigned$village.id <- ifelse(p2.assigned$village.final=="Kigaga", 25, p2.assigned$village.id)
p2.assigned$village.id <- ifelse(p2.assigned$village.final=="Kijuma", 47, p2.assigned$village.id)
p2.assigned$village.id <- ifelse(p2.assigned$village.final=="Kitahurira (Hakikome)", 20, p2.assigned$village.id)
p2.assigned$village.id <- ifelse(p2.assigned$village.final=="Kitahurira (Kashasha)", 70, p2.assigned$village.id)
p2.assigned$village.id <- ifelse(p2.assigned$village.final=="Kyogo (Kanungu)", 27, p2.assigned$village.id)
p2.assigned$village.id <- ifelse(p2.assigned$village.final=="Kyogo (Rubanda)", 59, p2.assigned$village.id)
p2.assigned$village.id <- ifelse(p2.assigned$village.final=="Mburameizi", 53, p2.assigned$village.id)
p2.assigned$village.id <- ifelse(p2.assigned$village.final=="Ndego", 74, p2.assigned$village.id)
p2.assigned$village.id <- ifelse(p2.assigned$village.final=="Nyakabungo (Bushura)", 36, p2.assigned$village.id)
p2.assigned$village.id <- ifelse(p2.assigned$village.final=="Nyakabungo (Kashekyera)", 60, p2.assigned$village.id)
p2.assigned$village.id <- ifelse(p2.assigned$village.final=="Nyakabungo (Rubimbwa)", 40, p2.assigned$village.id)
p2.assigned$village.id <- ifelse(p2.assigned$village.final=="Nyamabale", 77, p2.assigned$village.id)
p2.assigned$village.id <- ifelse(p2.assigned$village.final=="Nyamatsinda", 89, p2.assigned$village.id)
p2.assigned$village.id <- ifelse(p2.assigned$village.final=="Omumbuga" & !is.na(p2.assigned$treat2), 45, p2.assigned$village.id)
p2.assigned$village.id <- ifelse(p2.assigned$village.final=="Rushaga", 86, p2.assigned$village.id)
p2.assigned$village.id <- ifelse(p2.assigned$village.final=="Rwensanziro", 50, p2.assigned$village.id)

#Village id's not in sample frame used for assignment: 
#26. Bushegenyi (Ngaara) -- previously indicated as excluded from revenue-sharing
#83. Higabiro (Rubuguri) -- not part of sample frame
#Note: MB sent an email to JN about this on 1/12/18

#Copied identical cleaning for RI object from "p2.assigned" above
p2.ri$village.id <- NA
p2.ri <- p2.ri[,c(1:18,10019,19:10018)] #moving village.id into preliminary columns

for (i in 1:nrow(p2.ri)){
  p2.ri$village.id[i] <- ifelse((sum(p2.ri$village.final[i]==p2.ri$village.final)==1 & p2.ri$village.final[i] %in% village.sample.no.dup$village.name),
                                      village.sample.no.dup$village.id[p2.ri$village.final[i]==village.sample.no.dup$village.name],
                                      NA)
}

#Filling in the other village.id's manually using fuzzy matches from village.sample
#Note: have to use !is.na() from p2.assigned on duplicated; checked that final village names perfectly match
p2.ri$village.id <- ifelse(p2.ri$location.id.cleaned=="Rubanda.Ruhija.Kashekyera.Bitanwa", 62, p2.ri$village.id)
p2.ri$village.id <- ifelse(p2.ri$village.final=="Bugoro" & !is.na(p2.assigned$treat2), 48, p2.ri$village.id)
p2.ri$village.id <- ifelse(p2.ri$village.final=="Buhoma", 10, p2.ri$village.id)
p2.ri$village.id <- ifelse(p2.ri$village.final=="Ishaya", 49, p2.ri$village.id)
p2.ri$village.id <- ifelse(p2.ri$village.final=="Kajumo" & !is.na(p2.assigned$treat2), 35, p2.ri$village.id)
p2.ri$village.id <- ifelse(p2.ri$village.final=="Kanyabuhama", 46, p2.ri$village.id)
p2.ri$village.id <- ifelse(p2.ri$village.final=="Kanyashogi", 17, p2.ri$village.id)
p2.ri$village.id <- ifelse(p2.ri$village.final=="Kigaga", 25, p2.ri$village.id)
p2.ri$village.id <- ifelse(p2.ri$village.final=="Kijuma", 47, p2.ri$village.id)
p2.ri$village.id <- ifelse(p2.ri$village.final=="Kitahurira (Hakikome)", 20, p2.ri$village.id)
p2.ri$village.id <- ifelse(p2.ri$village.final=="Kitahurira (Kashasha)", 70, p2.ri$village.id)
p2.ri$village.id <- ifelse(p2.ri$village.final=="Kyogo (Kanungu)", 27, p2.ri$village.id)
p2.ri$village.id <- ifelse(p2.ri$village.final=="Kyogo (Rubanda)", 59, p2.ri$village.id)
p2.ri$village.id <- ifelse(p2.ri$village.final=="Mburameizi", 53, p2.ri$village.id)
p2.ri$village.id <- ifelse(p2.ri$village.final=="Ndego", 74, p2.ri$village.id)
p2.ri$village.id <- ifelse(p2.ri$village.final=="Nyakabungo (Bushura)", 36, p2.ri$village.id)
p2.ri$village.id <- ifelse(p2.ri$village.final=="Nyakabungo (Kashekyera)", 60, p2.ri$village.id)
p2.ri$village.id <- ifelse(p2.ri$village.final=="Nyakabungo (Rubimbwa)", 40, p2.ri$village.id)
p2.ri$village.id <- ifelse(p2.ri$village.final=="Nyamabale", 77, p2.ri$village.id)
p2.ri$village.id <- ifelse(p2.ri$village.final=="Nyamatsinda", 89, p2.ri$village.id)
p2.ri$village.id <- ifelse(p2.ri$village.final=="Omumbuga" & !is.na(p2.assigned$treat2), 45, p2.ri$village.id)
p2.ri$village.id <- ifelse(p2.ri$village.final=="Rushaga", 86, p2.ri$village.id)
p2.ri$village.id <- ifelse(p2.ri$village.final=="Rwensanziro", 50, p2.ri$village.id)


###Data Cleaning -----

##Adding village ids (per field sheet of projects)

audit.v$village.id <- NA
audit.v <- audit.v[,c(1:5,27,6:26)] #Reording columns to put "village.id" next to "village"

#Loop for unique matches
for (i in 1:nrow(audit.v)){
  audit.v$village.id[i] <- ifelse((sum(audit.v$village[i]==village.sample$village.name)==1 & audit.v$village[i] %in% village.sample.no.dup$village.name),
                                      village.sample.no.dup$village.id[audit.v$village[i]==village.sample.no.dup$village.name],
                                      NA)
}

#Filling in the other village.id's manually using fuzzy matches with village.sample
audit.v$village.id <- ifelse(audit.v$village=="Buhoma", 10, audit.v$village.id)
audit.v$village.id <- ifelse(audit.v$village=="Kanyabuhama", 46, audit.v$village.id)
audit.v$village.id <- ifelse(audit.v$village=="Ruboona", 12, audit.v$village.id)
audit.v$village.id <- ifelse(audit.v$village=="Nyakabungo ( Rubibwa )", 40, audit.v$village.id)
audit.v$village.id <- ifelse(audit.v$village=="Nyakabungo bushura", 36, audit.v$village.id)
audit.v$village.id <- ifelse(audit.v$village=="Ishaya ( Mucusye )", 49, audit.v$village.id)
audit.v$village.id <- ifelse(audit.v$village=="Bugolo", 48, audit.v$village.id)
audit.v$village.id <- ifelse(audit.v$village=="Kanyashogi", 17, audit.v$village.id)
audit.v$village.id <- ifelse(audit.v$village=="Kyogo (mpungu )", 27, audit.v$village.id) #Mpungu is the sub-county
audit.v$village.id <- ifelse(audit.v$village=="Mburameizi", 53, audit.v$village.id)
audit.v$village.id <- ifelse(audit.v$village=="Kyogo rubanda", 59, audit.v$village.id)
audit.v$village.id <- ifelse(audit.v$village=="Kigaga", 25, audit.v$village.id)
audit.v$village.id <- ifelse(audit.v$village=="Nyakabungo kashekyera", 60, audit.v$village.id)
audit.v$village.id <- ifelse(audit.v$village=="Ndengo", 74, audit.v$village.id)
audit.v$village.id <- ifelse(audit.v$village=="Nyambare", 77, audit.v$village.id)
audit.v$village.id <- ifelse(audit.v$village=="Rushaga", 86, audit.v$village.id)
audit.v$village.id <- ifelse(audit.v$village=="Rwensanziro", 50, audit.v$village.id)
audit.v$village.id <- ifelse(audit.v$village=="Nyakabungo ( Kashekyera )", 60, audit.v$village.id)
audit.v$village.id <- ifelse(audit.v$village=="Kitahurira", 20, audit.v$village.id) #Matched using project components from "audit.c"
audit.v$village.id <- ifelse(audit.v$village=="Kyitahurira", 70, audit.v$village.id) #Matched using project components from "audit.c"
#Checked: all villages in sample frame are accounted for and have a village.id

#Adding treatment assignment for accountability phase
p2.assigned <- p2.assigned[!is.na(p2.assigned$village.id),]
for (i in 1:nrow(audit.v)){
  audit.v$treat[i] <- ifelse(length(p2.assigned$treat2[p2.assigned$village.id==audit.v$village.id[i]])>0,
                             p2.assigned$treat2[p2.assigned$village.id==audit.v$village.id[i]], NA)
}
#This brings in the treatment assigned for the accountability phase, which should allow descriptive statistics to be computed

#Pulls treatment, village ID, district, parish, and subcounty information into the audit.c file
audit.c$treat <- NA
audit.c$village_id <- NA
audit.c$district <- NA
audit.c$subcounty <- NA
audit.c$parish <- NA

for (i in 1:nrow(audit.c)) {audit.c$treat[i] <- audit.v$treat[audit.c$X_parent_index[i]]}
for (i in 1:nrow(audit.c)) {audit.c$village_id[i] <- audit.v$village.id[audit.c$X_parent_index[i]]}

for (i in 1:nrow(audit.c)) {
  for (j in 1:nrow(p2.assigned)) {
    if (audit.c$village_id[i] == p2.assigned$village.id[j]) {
      audit.c$district[i] <- p2.assigned$district.orig[j]; 
      audit.c$subcounty[i] <- p2.assigned$subcounty.orig[j]; 
      audit.c$parish[i] <- p2.assigned$parish.orig[j];
      audit.c$sc.block[i] <- p2.assigned$sc.block[j]
    }
  }
} #Note: loop involves overwriting j many times, but works quickly
#Note: NAs caused by measurements taken outside of sample frame
#Note: confirmed that no slight mispellings in district, subcounty, or parish strings

#Creates dummy variable for whether the component is PAM (1 if it is PAM)
audit.c$PAM <- NA

for (i in 1:nrow(audit.c)) {
  ifelse(audit.c$PAM_part[i] == "yes", audit.c$PAM[i] <- 1, audit.c$PAM[i] <- 0)
}

###Combining audits of same villages separated
#Note: using audit.c as the main file for analysis, it is not necessary to merge separate audit.v entries (other than for later analysis of information source)


###Removing training or incorrect entries
audit.c <- audit.c[,c(69,68,1:67,70:74)] #Re-arranging columns for convenience, moving village_id to front

#Creating objects to code shared projects for later exclusion/analysis
audit.c$shared <- 0
audit.c$shared.ids <- NA
audit.c <- audit.c[,c(1:3,75:76,4:74)] #Re-arranging columns for convenience, moving up shared variables


#Manually noting villages that have components that do not exactly match (https://docs.google.com/document/d/1li_seH-D1vbxm7awX7Cuvlhya6nOQnfwonc6qZktwbU/edit?usp=sharing):
#1-6: share non-PAM project, which means non-independent outcomes
audit.c$shared <- ifelse(audit.c$village_id %in% 1:6 & (audit.c$Description_of_Project_Part=="Community camp" | audit.c$Description_of_Project_Part=="Construction of  community  camp (shared with the Parish)"), 1, 0)
audit.c$shared.ids <- ifelse(audit.c$village_id %in% 1:6 & (audit.c$Description_of_Project_Part=="Community camp" | audit.c$Description_of_Project_Part=="Construction of  community  camp (shared with the Parish)"),
                             "1,2,3,4,5,6",audit.c$shared.ids)

#7,10: problems w/ approved vs. not approved data, extra road
#(2/6): inquiry sent to Jeremiah about extra road --> the road is shared with village 10
audit.c$shared <- ifelse(audit.c$village_id %in% c(7,10) & (audit.c$Description_of_Project_Part=="Construction  of Nyabitsika/Kabumba road shared with Nkwenda  village" | audit.c$Description_of_Project_Part=="Opening of community feeder road (nyabisiika kabumba"), 1, audit.c$shared)
audit.c$shared.ids <- ifelse(audit.c$village_id %in% c(7,10) & (audit.c$Description_of_Project_Part=="Construction  of Nyabitsika/Kabumba road shared with Nkwenda  village" | audit.c$Description_of_Project_Part=="Opening of community feeder road (nyabisiika kabumba"),
                             "7,10",audit.c$shared.ids)

#10: PAM component duplicated
grab <- audit.c[audit.c$X_index==15,c("Picture_of_PAM_work_3","Description_of_PAM_work_3")]
audit.c[audit.c$X_index==14,c("Picture_of_PAM_work_2","Description_of_PAM_work_2")] <- grab #From X_index==15
audit.c[audit.c$X_index==14,"reasons_evidence_lacking.poor_no_labels"] <- 1 #From X_index==15
audit.c <- audit.c[!audit.c$X_index==15,] #removing X_index==15

grab <- audit.c[audit.c$X_index==16,c("Picture4","Description_of_Picture4")] #From X_index==16
audit.c[audit.c$X_index==14,c("Picture3","Description_of_Picture3")] <- grab #From X_index==16
audit.c <- audit.c[!audit.c$X_index==16,] #removing X_index==16

#11-12: shared health center, which means non-independent outcomes
audit.c$shared <- ifelse(audit.c$village_id==11 & audit.c$Description_of_Project_Part=="Health centre construction",1,audit.c$shared)
audit.c$shared <- ifelse(audit.c$village_id==12 & audit.c$Description_of_Project_Part=="Community health centre",1,audit.c$shared)
audit.c$shared.ids <- ifelse(audit.c$village_id %in% 11:12 & (audit.c$Description_of_Project_Part=="Health centre construction" | audit.c$Description_of_Project_Part=="Community health centre"),
                             "11,12",audit.c$shared.ids)

#12: PAM component duplicated with erroneous entry
audit.c <- audit.c[!(audit.c$village_id==12 & audit.c$Description_of_Project_Part=="None"),]

#13: one erroneous entry
audit.c <- audit.c[!(audit.c$village_id==13 & audit.c$Description_of_Project_Part=="No project part (just an error)"),]

#14: many PAM / goat duplicates
# duplicate entries are exact copies
audit.c <- audit.c[!audit.c$X_index==45,] #removing X_index==45
audit.c <- audit.c[!audit.c$X_index==46,] #removing X_index==46
audit.c <- audit.c[!audit.c$X_index==49,] #removing X_index==49
audit.c <- audit.c[!audit.c$X_index==50,] #removing X_index==50

#20-24: shared community camp, which means non-independent outcomes
audit.c$shared <- ifelse(audit.c$village_id %in% 20:24 & (audit.c$Description_of_Project_Part=="Construction of Buremba community tourist center" | audit.c$Description_of_Project_Part=="Community  camp" | audit.c$Description_of_Project_Part=="Construction of  Buremba community  tourism centre" | audit.c$Description_of_Project_Part=="Construction of Buremba community tourism center" | audit.c$Description_of_Project_Part=="Construction of bureau community tourism centre"), 1, audit.c$shared)
audit.c$shared.ids <- ifelse(audit.c$village_id %in% 20:24 & (audit.c$Description_of_Project_Part=="Construction of Buremba community tourist center" | audit.c$Description_of_Project_Part=="Community  camp" | audit.c$Description_of_Project_Part=="Construction of  Buremba community  tourism centre" | audit.c$Description_of_Project_Part=="Construction of Buremba community tourism center" | audit.c$Description_of_Project_Part=="Construction of bureau community tourism centre"),
                             "20,21,22,23,24",audit.c$shared.ids)

#26: village not part of sample frame (Bushegenyi)
audit.c <- audit.c[!(audit.c$village_id==26),]

#28-29: shared road, which means non-independent outcomes
audit.c$shared <- ifelse(audit.c$village_id==28 & audit.c$Description_of_Project_Part=="Opening karambi kyambeya,feeder road",1,audit.c$shared)
audit.c$shared <- ifelse(audit.c$village_id==29 & audit.c$Description_of_Project_Part=="Opening of karambi-kibingo community road",1,audit.c$shared)
audit.c$shared.ids <- ifelse(audit.c$village_id %in% 28:29 & (audit.c$Description_of_Project_Part=="Opening karambi kyambeya,feeder road" | audit.c$Description_of_Project_Part=="Opening of karambi-kibingo community road"),
                             "28,29",audit.c$shared.ids)

#33: duplicate PAM and goat entries
# duplicate uploads that are exact copies
audit.c <- audit.c[!(audit.c$village_id==33 & audit.c$X_parent_index==29),]

#41: duplicate PAM entries
grab <- audit.c[audit.c$X_index==38,c("Picture9","Description_of_Picture9","Picture10","Description_of_Picture10")] #From X_index==38
audit.c[audit.c$X_index==42,c("Picture9","Description_of_Picture9","Picture10","Description_of_Picture10")] <- grab #From X_index==38
audit.c[audit.c$X_index==42,c("dispersed_num_shown")] <- 10 #New value of PAM observations (from X_index==38)
audit.c <- audit.c[!audit.c$X_index==38,] #removing X_index==38

#42: duplicate PAM entries
# third component (X_index==84) is noted as an error in audit.v
audit.c <- audit.c[!audit.c$X_index==84,] #removing X_index==84

#48-49: potentially shared school, check with Jeremiah (2/5/18 Slack: confirmed by Jeremiah as shared)
audit.c$shared <- ifelse(audit.c$village_id %in% 48:49 & (audit.c$Description_of_Project_Part=="Construction of two class rooms" | audit.c$Description_of_Project_Part=="Completion of a two classroom block"), 1, audit.c$shared)
audit.c$shared.ids <- ifelse(audit.c$village_id %in% 48:49 & (audit.c$Description_of_Project_Part=="Construction of two class rooms" | audit.c$Description_of_Project_Part=="Completion of a two classroom block"),
                             "48,49",audit.c$shared.ids)

#57! audit of PAM component missing (Slack 2/5/18: the PAM component was never implemented in this village)
#Note: adding missing PAM component manually to downloaded file, per Jeremiah response
add57 <- c(57,0,
           "PAM",0,NA,
           "yes","approved","not_start_not_","dispersed","OK","OK","",NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,
           "","","","","","","","","","","","","","","","","","","","",
           "",0,"not_ver","not_delivered",0,0,0,0,0,0,0,1,
           "PAM for the whole of Ruhija Sub county was never implemented due to corruption at the subcounty. There was change of sub county chief after the 1st one being suspended and the new one only finding the procurement process finished. It is alleged that the former sub county chief never purchased any PAM item and kept the money",
           "","","","","","","OK",197,"Bwindi Accountability Endline LC/PMC physical audit (Final)",70,
           "Kabale","Ruhija","Kiyebe",1)
audit.c <- rbind(audit.c,add57)

#83: not measured correctly (doesn't matter because not in sample)
audit.c <- audit.c[!(audit.c$village_id==83),]

#87: sheep rearing duplicated
# removing unreliable audit that Jeremiah decided need to be redone
audit.c <- audit.c[!(audit.c$village_id==87 & audit.c$X_parent_index==92),]

#90: extra entry for PAM, which was not part of proposal
## Enumerator error, the extra entry doesn't correspond to a PAM project, just duplicates some of the info on the school
audit.c <- audit.c[!(audit.c$X_index==170),] #removing X_index==170

## Cross-tabulation of shared components
table(audit.c$treat,audit.c$shared)
prop.table(table(audit.c$treat,audit.c$shared),1)
chisq.test(table(audit.c$treat,audit.c$shared))



#Checking all "not_approved" entries against master RS list
## X_index == 2: shared road, not approved for Nkwenda, but was approved for Buhoma Central
## X_index == 71: Goat rearing was not approved
## X_index == 87: PAM was not approved
## X_index == 101: PAM was approved
## X_index == 123: Goat rearing was approved
## X_index == 166: Sheep rearing was approved
## X_index == 167: PAM was approved
## X_index == 180: Goat rearing was not approved
## X_index == 183: Sheep rearing was not approved, it was implemented instead of the approved goat rearing project
## X_index == 188: gravity water taps were approved, but never implemented. Money was instead used on the road.
## Fixing projects that were incorrectly labeled as not approved:
audit.c$project_part_approved <- ifelse(audit.c$X_index == 101 | audit.c$X_index == 123 | audit.c$X_index == 166 | audit.c$X_index == 167 | audit.c$X_index == 188, "approved", audit.c$project_part_approved)


###Compiling/Cleaning outcome data -----

#Note: there are many "dispersed" projects marked "not_dispersed", but not the other way
#Note: recoding "dispersed" projects marked incorrectly
#sub.c.nd <- subset(audit.c, shared==0 & dispersed_status=="not_dispersed")
#sub.c.d <- subset(audit.c, shared==0 & dispersed_status=="dispersed")
#see <- sub.c.nd[,c(1:3,69)] #convenience object to determine miscoded components
#Note: X=index=c(175,182) these are not_dispersed red chili components, OK.
nd2d <- c(127,132,184:187,189:191) #these are X_index values incorrect marked as "not_dispersed"
audit.c$dispersed_status <- ifelse(audit.c$X_index %in% nd2d, "dispersed", audit.c$dispersed_status)

sub.c.nd <- subset(audit.c, shared==0 & dispersed_status=="not_dispersed")
sub.c.d <- subset(audit.c, shared==0 & dispersed_status=="dispersed")

audit.c$Verifiable <- ifelse(audit.c$X_index==79, "somewhat_ver", audit.c$Verifiable) #Filling in missing value based on pictures and description

#Checking on missing data for "dispersed_num_shown"
#see <- audit.c[is.na(audit.c$dispersed_num_shown),] #convenience object for cleaning
#see2 <- audit.c[!is.na(audit.c$dispersed_num_shown),] #convenience object for cleaning

#Using presense of pictures to recode these values within dispersed / approved project subset
sub.c.d$pics <- 0
sub.c.d$pics <- ifelse(sub.c.d$Picture1=="",sub.c.d$pics,1)
sub.c.d$pics <- ifelse(sub.c.d$Picture2=="",sub.c.d$pics,2)
sub.c.d$pics <- ifelse(sub.c.d$Picture3=="",sub.c.d$pics,3)
sub.c.d$pics <- ifelse(sub.c.d$Picture4=="",sub.c.d$pics,4)
sub.c.d$pics <- ifelse(sub.c.d$Picture5=="",sub.c.d$pics,5)
sub.c.d$pics <- ifelse(sub.c.d$Picture6=="",sub.c.d$pics,6)
sub.c.d$pics <- ifelse(sub.c.d$Picture7=="",sub.c.d$pics,7)
sub.c.d$pics <- ifelse(sub.c.d$Picture8=="",sub.c.d$pics,8)
sub.c.d$pics <- ifelse(sub.c.d$Picture9=="",sub.c.d$pics,9)
sub.c.d$pics <- ifelse(sub.c.d$Picture10=="",sub.c.d$pics,10)
table(sub.c.d$treat,sub.c.d$pics)

table(sub.c.d$pics, sub.c.d$dispersed_num_shown) #There is a small amount of mismatch between number of pictures and number reported by enumerators
check <- subset(sub.c.d, pics!=dispersed_num_shown)

#X_index values checked in object "check"
#11: mismatch due to duplicate components merged
sub.c.d$pics[sub.c.d$X_index==111] <- 0 #The project shown is not the project component listed (it is an Irish potatoe garden planted w/ money from RS)
sub.c.d$pics[sub.c.d$X_index==141] <- 1 #Only one set of PAM items is captures in photographs; other photographs are PAM work completed, which should be captured separately; list of items is not an item observed
#151: "pics" value is correct; enumerator entered wrong value for "dispersed_num_shown"
#180: "pics" value is correct; enumerator entered wrong value for "dispersed_num_shown"

#Note: non-missing data for "dispersed_num_shown" have been superceded by the "pics" variable that is cleaned for actual number of pictures recorded

audit.c$pics <- NA
for (i in 1:nrow(audit.c)){
  sub <- subset(sub.c.d, X_index==audit.c$X_index[i])
  
  if (nrow(sub) > 0){
    audit.c$pics[i] <- sub$pics[1]
  }
} #Checked: merged correctly


###Merging RI object into audit.c for ease of use -----

p2.ri <- p2.ri[!is.na(p2.ri$village.id) & (p2.ri$village.id %in% audit.c$village_id),]
names(p2.ri)[5] <- "treat.ph1"

audit.c.ri <- merge(audit.c, p2.ri, by.x="village_id", by.y="village.id", all.x=TRUE, all.y=FALSE)
#Checked: number of rows does not change
#To Do: check discrepancy between sc.block.x and sc.block.y

audit.c.ri$t.sub <- NA

```

```{r survey-data-clean}

## Survey Data Cleaning ----

###Adding village ids (per sheet of projects used during auditing and field enumeration)
#To Do: use "village.sample" to match and add village.id, which is going to be the merge variable across all datasets

survey$village.id <- NA
survey <- survey[,c(1:4,79,5:78)] #Reording columns to put "village.id" next to "Village_Name"

#Loop for unique matches
for (i in 1:nrow(survey)){
  survey$village.id[i] <- ifelse((sum(survey$Village_Name[i]==village.sample$village.name)==1 & survey$Village_Name[i] %in% village.sample.no.dup$village.name),
                                  village.sample.no.dup$village.id[survey$Village_Name[i]==village.sample.no.dup$village.name],
                                  NA)
}

table(survey$village.id, useNA = "always") #384 entries remaining to be cleaned

survey$start <- as.character(survey$start)
substr(survey$start, 11, 11) <- " "
survey$survey.time <- strptime(survey$start, "%F %T")

#Loop for interpolated enumerator matches
for (i in 1:nrow(survey)){
  if (is.na(survey$village.id[i])){
    sub.earlier <- subset(survey, Enumerator_name==survey$Enumerator_name[i] & !is.na(village.id) & survey$survey.time<survey$survey.time[i])
    sub.earlier <- sub.earlier[rev(order(sub.earlier$survey.time)),]
    sub.later <- subset(survey, Enumerator_name==survey$Enumerator_name[i]  & !is.na(village.id) & survey$survey.time>survey$survey.time[i])
    sub.later <- sub.later[order(sub.later$survey.time),]
    
    survey$village.id[i] <- ifelse(sub.earlier$village.id[1]==sub.later$village.id[1], sub.earlier$village.id[1], NA)
  }
}

table(survey$village.id, useNA = "always") #365 entries remaining to be cleaned

unique(survey$Village_Name[is.na(survey$village.id)]) #These are all the village names that need to be cleaned manually

#Filling in the other village.id's manually using fuzzy matches with village.sample
survey$village.id <- ifelse(survey$Village_Name=="Rushaga", 86, survey$village.id)
survey$village.id <- ifelse(survey$Village_Name=="Ruboona", 12, survey$village.id)
survey$village.id <- ifelse(survey$Village_Name=="Nyambare", 77, survey$village.id)
survey$village.id <- ifelse(survey$Village_Name=="Nyakango kashekyera", 60, survey$village.id)
survey$village.id <- ifelse(survey$Village_Name=="Nyakabungo kashekyera", 60, survey$village.id)
survey$village.id <- ifelse(survey$Village_Name=="Nyakabungo bushara", 36, survey$village.id)
survey$village.id <- ifelse(survey$Village_Name=="Nyakabungo bushura", 36, survey$village.id)
survey$village.id <- ifelse(survey$Village_Name=="Nyakabungo ( Rubibwa )", 40, survey$village.id)
survey$village.id <- ifelse(survey$Village_Name=="Ndengo", 74, survey$village.id)
survey$village.id <- ifelse(survey$Village_Name=="Ndego", 74, survey$village.id)
survey$village.id <- ifelse(survey$Village_Name=="Mburameizi", 53, survey$village.id)
survey$village.id <- ifelse(survey$Village_Name=="Kyteitokwa", 64, survey$village.id)
survey$village.id <- ifelse(survey$Village_Name=="Kyoto rubanda", 59, survey$village.id)
survey$village.id <- ifelse(survey$Village_Name=="Kyogo rubanda", 59, survey$village.id)
survey$village.id <- ifelse(survey$Village_Name=="Kyogo ( mpungu )", 27, survey$village.id)
survey$village.id <- ifelse(survey$Village_Name=="Kitojo", 67, survey$village.id)
survey$village.id <- ifelse(survey$Village_Name=="Kigaga", 25, survey$village.id)
survey$village.id <- ifelse(survey$Village_Name=="Kanyashogi", 17, survey$village.id)
survey$village.id <- ifelse(survey$Village_Name=="Kanyabuhama", 46, survey$village.id)
survey$village.id <- ifelse(survey$Village_Name=="Ishaya(Mucusye )", 49, survey$village.id)
survey$village.id <- ifelse(survey$Village_Name=="Ishaya (Mucusye )", 49, survey$village.id)
survey$village.id <- ifelse(survey$Village_Name=="Ishaya (Mucusye)", 49, survey$village.id)
survey$village.id <- ifelse(survey$Village_Name=="Ishaya ( Mucusye )", 49, survey$village.id)
survey$village.id <- ifelse(survey$Village_Name=="Buhomo", 10, survey$village.id)
survey$village.id <- ifelse(survey$Village_Name=="Buhoma", 10, survey$village.id)
survey$village.id <- ifelse(survey$Village_Name=="Bugolo", 48, survey$village.id)
#MB: it is always dangerous in code to use hanging indices as below, which will not work if something accidently gets sorted somewhere else.
survey$village.id[74] <- 12
survey$village.id[546] <- 40
survey$village.id[548] <- 40
survey$village.id[555] <- 40
survey$village.id[637] <- 36
survey$village.id[641] <- 36
survey$village.id[759] <- 27
survey$village.id[996] <- 59
survey$village.id[1169] <- 60
survey$village.id[1290] <- 74

##Kitahurira village surveys done by Wilber are for Kitahurira (Kashasha), because Wilber was primarily auditing in the Kabale district. 
##Kitahurira village surveys done by Kenneth are then Kitahurira (Hakikome). Filling in the village IDs:
for (i in 1468:1489) {
  survey$village.id[i] <- 70
}

for (i in 796:805) {
  survey$village.id[i] <- 20
}
survey$village.id[830] <- 20
survey$village.id[838] <- 20
survey$village.id[845] <- 20
survey$village.id[847] <- 20
survey$village.id[848] <- 20
survey$village.id[849] <- 20
survey$village.id[852] <- 20
survey$village.id[857] <- 20
survey$village.id[912] <- 20
survey$village.id[913] <- 20

table(survey$village.id, useNA = "always") #Confirmed: no entries without village id values

#Adding treatment assignment for accountability phase
p2.assigned <- p2.assigned[!is.na(p2.assigned$village.id),]
for (i in 1:nrow(survey)){
  survey$treat[i] <- ifelse(length(p2.assigned$treat2[p2.assigned$village.id==survey$village.id[i]])>0,
                             p2.assigned$treat2[p2.assigned$village.id==survey$village.id[i]], NA)
}
#This brings in the treatment assigned for the accountability phase, which should allow descriptive statistics to be computed

### Fill in incorrect values for age with village mean
survey$age_cleaned <- survey$age
survey$age_cleaned[275] <- NA
survey$age_cleaned[275] <- mean(survey[survey$village.id==46,"age_cleaned"], na.rm=TRUE)
survey$age_cleaned[1423] <- NA
survey$age_cleaned[1423] <- mean(survey[survey$village.id==80,"age_cleaned"], na.rm=TRUE)

## Compiling survey outcome data ----

# Create columns for the linear scales of each variable of interest
survey$subcounty <- NA
survey$listed_beneficiary_this_year_scale <- NA
survey$expect_benefit_this_year_scale <- NA
survey$satisfied_with_planning_scale <- NA
survey$satisfied_with_implementation_scale <- NA
survey$satisfied_with_BNP_management_scale <- NA
survey$satisfied_with_RS_scale <- NA
survey$important_to_conserve_scale <- NA
survey$benefits_valuable_scale <- NA
survey$sc.block <- NA
#To Do: automate the number of observations in the tables

# Create linear scales for each output variable of interest
survey$pic_shown <- NA
survey$willing_binary <- NA
survey$project_implemented_as_planned_binary <- NA

for (i in 1:nrow(survey))
{
  survey$pic_shown[i] <- ifelse(survey$pic1[i] == "" & survey$pic2[i] == "" & survey$pic3[i] == "", 0, 1)
  survey$willing_binary[i] <- ifelse(survey$The_last_question_is_more_of_a[i] == "willing", 1, 0)
  survey$project_implemented_as_planned_binary[i] <- ifelse(survey$project_implemented_as_planned[i] == "yes", 1, 0)
}

for (i in 1:nrow(survey))
{
  for (j in 1:nrow(p2.assigned))
  {
    survey$subcounty[i] <- ifelse(survey$village.id[i] == p2.assigned$village.id[j], p2.assigned$subcounty.orig[j], survey$subcounty[i])
    survey$sc.block[i] <- ifelse(survey$village.id[i] == p2.assigned$village.id[j], p2.assigned$sc.block[j], survey$sc.block[i])
  }
  
  survey$listed_beneficiary_this_year_scale[i] <- ifelse(survey$listed_beneficiary_this_year[i] == "yes", 1, 0)
  
  survey$expect_benefit_this_year_scale[i] <- ifelse(survey$expect_benefit_this_year[i] == "yes", 1, ifelse(survey$expect_benefit_this_year[i] == "no", 0, NA))
  
  survey$satisfied_with_planning_scale[i] <- ifelse(survey$satisfied_with_planning[i] == "very_dissatisf", 0, 
                                                    ifelse(survey$satisfied_with_planning[i] == "somewhat_dissa", 1,
                                                           ifelse(survey$satisfied_with_planning[i] == "somewhat_satis", 2,
                                                                  ifelse(survey$satisfied_with_planning[i] == "very_satisfied", 3, NA))))
  
  survey$satisfied_with_implementation_scale[i] <- ifelse(survey$satisfied_with_implementation[i] == "very_dissatisf", 0, 
                                                    ifelse(survey$satisfied_with_implementation[i] == "somewhat_dissa", 1,
                                                           ifelse(survey$satisfied_with_implementation[i] == "somewhat_satis", 2,
                                                                  ifelse(survey$satisfied_with_implementation[i] == "very_satisfied", 3, NA))))
  
  survey$satisfied_with_BNP_management_scale[i] <- ifelse(survey$satisfied_with_BNP_management[i] == "very_dissatisf", 0, 
                                                    ifelse(survey$satisfied_with_BNP_management[i] == "somewhat_dissa", 1,
                                                           ifelse(survey$satisfied_with_BNP_management[i] == "neutral", 2,
                                                           ifelse(survey$satisfied_with_BNP_management[i] == "somewhat_satis", 3,
                                                                  ifelse(survey$satisfied_with_BNP_management[i] == "very_satisfied", 4, NA)))))
  
  survey$satisfied_with_RS_scale[i] <- ifelse(survey$satisfied_with_RS[i] == "very_dissatisf", 0, 
                                                          ifelse(survey$satisfied_with_RS[i] == "somewhat_dissa", 1,
                                                                 ifelse(survey$satisfied_with_RS[i] == "neutral", 2,
                                                                        ifelse(survey$satisfied_with_RS[i] == "somewhat_satis", 3,
                                                                               ifelse(survey$satisfied_with_RS[i] == "very_satisfied", 4, NA)))))
  
  survey$important_to_conserve_scale[i] <- ifelse(survey$important_to_conserve[i] == "not_very_impor", 0, 
                                              ifelse(survey$important_to_conserve[i] == "somewhat_impor", 1,
                                                     ifelse(survey$important_to_conserve[i] == "very_important", 2, NA)))
  
  survey$benefits_valuable_scale[i] <- ifelse(survey$benefits_valuable[i] == "not_at_all_val", 0, 
                                                  ifelse(survey$benefits_valuable[i] == "not_very_valua", 1,
                                                         ifelse(survey$benefits_valuable[i] == "somewhat_valua", 2, 
                                                                ifelse(survey$benefits_valuable[i] == "very_valuable", 3, NA))))
}

survey <- subset(survey, !is.na(survey$treat))

p2.ri <- p2.ri[!is.na(p2.ri$village.id) & (p2.ri$village.id %in% survey$village.id),]
names(p2.ri)[5] <- "treat.ph1"
p2.ri <- p2.ri[-c(50,84,51),]

survey.ri <- merge(x=survey, y=p2.ri, by.x="village.id", by.y="village.id", all.x=TRUE, all.y=FALSE)
survey.ri <- survey.ri[!is.na(survey.ri$treat), ]
survey.ri$t.sub <- NA

```

## Table 1: Results of Revenue-Sharing Implementation from Physical and Resident Audits

```{r tab1-audit-results}

## Audit outcomes (top portion of table) ----

sub.c <- subset(audit.c.ri, shared==0)

sub.c$finished <- ifelse(sub.c$implementation_status=="finished", 1, 0)
sub.c$approved.imp <- ifelse(sub.c$project_part_approved=="approved" & sub.c$implementation_status!="not_start_exp" & sub.c$implementation_status!="not_start_not_", 1, 0)
sub.c$complete <- ifelse(sub.c$proportion_observable=="complete" | sub.c$proportion_observable=="mostly_comp", 1, 0)
sub.c$ver <- ifelse(sub.c$Verifiable=="verifiable", 1, 0)
sub.c$somewhat.ver <- ifelse(sub.c$Verifiable=="somewhat_ver", 1, 0)
sub.c$not.ver <- ifelse(sub.c$Verifiable=="not_ver", 1, 0)

###Descriptives and sampling variance

var <- c("Approved project implemented",
         "Non-dispersed project complete",
         "Number of dispersed project items shown (0-10)",
         "Project verifiable",
         "Project somewhat verifiable"
         )

trt <- c(mean(sub.c$approved.imp[sub.c$treat==1]),
         mean(sub.c$complete[sub.c$treat==1 & sub.c$dispersed_status=="not_dispersed"]),
         mean(sub.c$pics[sub.c$treat==1 & sub.c$dispersed_status=="dispersed"]),
         mean(sub.c$ver[sub.c$treat==1]),
         mean(sub.c$somewhat.ver[sub.c$treat==1])
         )

trt.se <- c(se.boot.cluster(vector=sub.c$approved.imp[sub.c$treat==1], clusters=sub.c$village_id[sub.c$treat==1]),
            se.boot.cluster(vector=sub.c$complete[sub.c$treat==1 & sub.c$dispersed_status=="not_dispersed"], clusters=sub.c$village_id[sub.c$treat==1 & sub.c$dispersed_status=="not_dispersed"]),
            se.boot.cluster(vector=sub.c$pics[sub.c$treat==1 & sub.c$dispersed_status=="dispersed"], clusters=sub.c$village_id[sub.c$treat==1 & sub.c$dispersed_status=="dispersed"]),
            se.boot.cluster(vector=sub.c$ver[sub.c$treat==1], clusters=sub.c$village_id[sub.c$treat==1]),
            se.boot.cluster(vector=sub.c$somewhat.ver[sub.c$treat==1], clusters=sub.c$village_id[sub.c$treat==1])
) 

ctl <- c(mean(sub.c$approved.imp[sub.c$treat==0]),
         mean(sub.c$complete[sub.c$treat==0 & sub.c$dispersed_status=="not_dispersed"]),
         mean(sub.c$pics[sub.c$treat==0 & sub.c$dispersed_status=="dispersed"]),
         mean(sub.c$ver[sub.c$treat==0]),
         mean(sub.c$somewhat.ver[sub.c$treat==0])
         )

ctl.se <- c(se.boot.cluster(vector=sub.c$approved.imp[sub.c$treat==0], clusters=sub.c$village_id[sub.c$treat==0]),
            se.boot.cluster(vector=sub.c$complete[sub.c$treat==0 & sub.c$dispersed_status=="not_dispersed"], clusters=sub.c$village_id[sub.c$treat==0 & sub.c$dispersed_status=="not_dispersed"]),
            se.boot.cluster(vector=sub.c$pics[sub.c$treat==0 & sub.c$dispersed_status=="dispersed"], clusters=sub.c$village_id[sub.c$treat==0 & sub.c$dispersed_status=="dispersed"]),
            se.boot.cluster(vector=sub.c$ver[sub.c$treat==0], clusters=sub.c$village_id[sub.c$treat==0]),
            se.boot.cluster(vector=sub.c$somewhat.ver[sub.c$treat==0], clusters=sub.c$village_id[sub.c$treat==0])
) 

diff <- trt - ctl

###Randomization inference

##Difference-in-means
dm.approved <- ri.diff(ri.obj = sub.c, outcome.var = "approved.imp", treat.var = "treat", treat.ri.var = "t.sub", info.cols = 95, sims=simn)
dm.complete.con <- ri.diff(ri.obj = sub.c[sub.c$dispersed_status=="not_dispersed",], outcome.var = "complete", treat.var = "treat", treat.ri.var = "t.sub", info.cols = 95, sims=simn)
dm.pics <- ri.diff(ri.obj = sub.c[sub.c$dispersed_status=="dispersed",], outcome.var = "pics", treat.var = "treat", treat.ri.var = "t.sub", info.cols = 95, sims=simn) 
dm.ver <- ri.diff(ri.obj = sub.c, outcome.var = "ver", treat.var = "treat", treat.ri.var = "t.sub", info.cols = 95, sims=simn)
dm.somewhat.ver <- ri.diff(ri.obj = sub.c, outcome.var = "somewhat.ver", treat.var = "treat", treat.ri.var = "t.sub", info.cols = 95, sims=simn)

diff.null.se <- c(dm.approved$se.null, dm.complete.con$se.null, dm.pics$se.null, dm.ver$se.null, dm.somewhat.ver$se.null)

p <- c(dm.approved$p.one.way.greater, dm.complete.con$p.one.way.greater, dm.pics$p.one.way.greater, dm.ver$p.one.way.greater, dm.somewhat.ver$p.one.way.greater)

##FELM
felm.approved <- felm.ri(approved.imp ~ treat | sc.block.y | 0 | 0, dta=sub.c, treat.var = "treat", rand.ob = sub.c, rand.ob.info.cols = 95, sims=simn)
# felm.complete.con <- felm.ri(complete ~ treat | sc.block.y | 0 | 0, dta=sub.c[sub.c$dispersed_status=="not_dispersed",], treat.var = "treat", rand.ob = sub.c[sub.c$dispersed_status=="not_dispersed",], rand.ob.info.cols = 95, sims=simn)
felm.pics <- felm.ri(pics ~ treat | sc.block.y | 0 | 0, dta=sub.c[sub.c$dispersed_status=="dispersed",], treat.var = "treat", rand.ob = sub.c[sub.c$dispersed_status=="dispersed",], rand.ob.info.cols = 95, sims=simn)
felm.ver <- felm.ri(ver ~ treat | sc.block.y | 0 | 0, dta=sub.c, treat.var = "treat", rand.ob = sub.c, rand.ob.info.cols = 95, sims=simn)
felm.somewhat.ver <- felm.ri(somewhat.ver ~ treat | sc.block.y | 0 | 0, dta=sub.c, treat.var = "treat", rand.ob = sub.c, rand.ob.info.cols = 95, sims=simn)

felm.ate <- c(felm.approved$ate, -999, felm.pics$ate, felm.ver$ate, felm.somewhat.ver$ate)
felm.null.se <- c(felm.approved$se.null, -999, felm.pics$se.null, felm.ver$se.null, felm.somewhat.ver$se.null)
felm.p <- c(felm.approved$p.one.way.greater, -999, felm.pics$p.one.way.greater, felm.ver$p.one.way.greater, felm.somewhat.ver$p.one.way.greater)
felm.n <- c(felm.approved$N, -999, felm.pics$N, felm.ver$N, felm.somewhat.ver$N)


###Latex Table, R&P short article version

#rows
approved <- bind_cols(tab_cell_desc(desc=trt,desc.se=trt.se,i=1),
                      tab_cell_desc(desc=ctl,desc.se=ctl.se,i=1),
                      tab_cell(dm.approved),
                      tab_cell(felm.approved))
complete.con <- bind_cols(tab_cell_desc(desc=trt,desc.se=trt.se,i=2),
                          tab_cell_desc(desc=ctl,desc.se=ctl.se,i=2),
                          tab_cell(dm.complete.con),
                          empty_cell)
pics <- bind_cols(tab_cell_desc(desc=trt,desc.se=trt.se,i=3),
                  tab_cell_desc(desc=ctl,desc.se=ctl.se,i=3),
                  tab_cell(dm.pics),
                  tab_cell(felm.pics))
ver <- bind_cols(tab_cell_desc(desc=trt,desc.se=trt.se,i=4),
                 tab_cell_desc(desc=ctl,desc.se=ctl.se,i=4),
                 tab_cell(dm.ver),
                 tab_cell(felm.ver))
somewhat.ver <- bind_cols(tab_cell_desc(desc=trt,desc.se=trt.se,i=5),
                          tab_cell_desc(desc=ctl,desc.se=ctl.se,i=5),
                          tab_cell(dm.somewhat.ver),
                          tab_cell(felm.somewhat.ver))

tab_results <- bind_rows(approved,complete.con,pics,ver,somewhat.ver)
labs_rows <- tribble(~x,"Approved Project Implemented","","","Complete (Non-dispersed)","","","Pictures (Dispersed)","","","Fully Labeled","","","Partially Labeled","","")
tab_out <- bind_cols(labs_rows,tab_results)

tab.n <- tribble(~N, felm.approved$N, "", "",  18, "", "",  felm.pics$N, "", "",  felm.ver$N, "", "",  felm.somewhat.ver$N, "", "")
tab_out <- bind_cols(tab_out,tab.n) %>% rename(Outcome_Variable=x,
                                               Treatment=d,
                                               Control=d1,
                                               Difference_RI=h,
                                               FE_OLS_RI=h1)

stargazer(tab_out, type = "latex", summary = FALSE, rownames = FALSE)
#Wed, Jun 26, 2019 - 17:19:17


## Resident audit outcomes (bottom portion of table) ----

var <- c("Project Implemented",
         "Picture Shown"
)

trt <- c(mean(survey$project_implemented_as_planned_binary[survey$treat==1]),
         mean(survey$pic_shown[survey$treat==1])
)

trt.se <- c(se.boot.cluster(vector=survey$project_implemented_as_planned_binary[survey$treat==1],
                            clusters=survey$village.id[survey$treat==1]),
            se.boot.cluster(vector=survey$pic_shown[survey$treat==1],
                            clusters=survey$village.id[survey$treat==1])
)

ctl <- c(mean(survey$project_implemented_as_planned_binary[survey$treat==0]),
         mean(survey$pic_shown[survey$treat==0])
)

ctl.se <- c(se.boot.cluster(vector=survey$project_implemented_as_planned_binary[survey$treat==0],
                            clusters=survey$village.id[survey$treat==0]),
            se.boot.cluster(vector=survey$pic_shown[survey$treat==0],
                            clusters=survey$village.id[survey$treat==0])
)

diff <- trt - ctl

#diff
pictures <- ri.diff(ri.obj = survey.ri, outcome.var = "pic_shown", treat.var = "treat", treat.ri.var = "t.sub", info.cols = 113, sims=simn)
project_implemented <- ri.diff(ri.obj = survey.ri, outcome.var = "project_implemented_as_planned_binary", treat.var = "treat", treat.ri.var = "t.sub", info.cols = 113, sims=simn)

diff.null.se <- c(project_implemented$se.null, pictures$se.null)
p <- c(project_implemented$p.one.way.greater, pictures$p.one.way.greater)

#felm
pictures.felm <- felm.ri(pic_shown ~ treat + factor(gender) + age + factor(approximate_monthly_income) + factor(literacy) | sc.block.x | 0 | village.id, dta=survey.ri, treat.var = "treat", rand.ob = survey.ri, rand.ob.info.cols = 113, sims=simn)
project_implemented.felm <- felm.ri(project_implemented_as_planned_binary ~ treat + factor(gender) + age + factor(approximate_monthly_income) + factor(literacy) | sc.block.x | 0 | village.id, dta=survey.ri, treat.var = "treat", rand.ob = survey.ri, rand.ob.info.cols = 113, sims=simn)

felm.ate <- c(project_implemented.felm$ate, pictures.felm$ate)
felm.null.se <- c(project_implemented.felm$se.null, pictures.felm$se.null)
felm.p <- c(project_implemented.felm$p.one.way.greater, pictures.felm$p.one.way.greater)

###Latex Table, R&P short article version

#rows
impll <- bind_cols(tab_cell_desc(desc=trt,desc.se=trt.se,i=1),
                      tab_cell_desc(desc=ctl,desc.se=ctl.se,i=1),
                      tab_cell(project_implemented),
                      tab_cell(project_implemented.felm))
picc <- bind_cols(tab_cell_desc(desc=trt,desc.se=trt.se,i=2),
                      tab_cell_desc(desc=ctl,desc.se=ctl.se,i=2),
                      tab_cell(pictures),
                      tab_cell(pictures.felm))

tab_results <- bind_rows(impll,picc)
labs_rows <- tribble(~x,"Project Implemented","","","Picture Shown","","")
tab_out <- bind_cols(labs_rows,tab_results)

tab.n <- tribble(~N, project_implemented.felm$N, "", "", pictures.felm$N, "", "")

tab_out <- bind_cols(tab_out,tab.n) %>% rename(Outcome_Variable=x,
                                               Treatment=d,
                                               Control=d1,
                                               Difference_RI=h,
                                               FE_OLS_RI=h1)

stargazer(tab_out, type = "latex", summary = FALSE, rownames = FALSE)
#Date and time: Tue, Jul 02, 2019 - 15:26:30

```

## Table 2: Attitudes about Revenue Sharing from Resident Surveys

```{r tab2-resident-surveys}

var <- c("Satisfied with Implementation",
         "Satisfied with BNP Management",
         "Satisfied with RS",
         "Important to Conserve",
         "Benefits Valuable"
)

trt <- c(mean(survey$satisfied_with_implementation_scale[survey$treat==1], na.rm=TRUE),
         mean(survey$satisfied_with_BNP_management_scale[survey$treat==1], na.rm=TRUE),
         mean(survey$satisfied_with_RS_scale[survey$treat==1], na.rm=TRUE),
         mean(survey$important_to_conserve_scale[survey$treat==1], na.rm=TRUE),
         mean(survey$benefits_valuable_scale[survey$treat==1], na.rm=TRUE)
)

trt.se <- c(se.boot.cluster(vector=survey$satisfied_with_implementation_scale[survey$treat==1],
                            clusters=survey$village.id[survey$treat==1]),
            se.boot.cluster(vector=survey$satisfied_with_BNP_management_scale[survey$treat==1],
                            clusters=survey$village.id[survey$treat==1]),
            se.boot.cluster(vector=survey$satisfied_with_RS_scale[survey$treat==1],
                            clusters=survey$village.id[survey$treat==1]),
            se.boot.cluster(vector=survey$important_to_conserve_scale[survey$treat==1],
                            clusters=survey$village.id[survey$treat==1]),
            se.boot.cluster(vector=survey$benefits_valuable_scale[survey$treat==1],
                            clusters=survey$village.id[survey$treat==1])
)


ctl <- c(mean(survey$satisfied_with_implementation_scale[survey$treat==0], na.rm=TRUE),
         mean(survey$satisfied_with_BNP_management_scale[survey$treat==0], na.rm=TRUE),
         mean(survey$satisfied_with_RS_scale[survey$treat==0], na.rm=TRUE),
         mean(survey$important_to_conserve_scale[survey$treat==0], na.rm=TRUE),
         mean(survey$benefits_valuable_scale[survey$treat==0], na.rm=TRUE)
)

ctl.se <- c(se.boot.cluster(vector=survey$satisfied_with_implementation_scale[survey$treat==0],
                            clusters=survey$village.id[survey$treat==0]),
            se.boot.cluster(vector=survey$satisfied_with_BNP_management_scale[survey$treat==0],
                            clusters=survey$village.id[survey$treat==0]),
            se.boot.cluster(vector=survey$satisfied_with_RS_scale[survey$treat==0],
                            clusters=survey$village.id[survey$treat==0]),
            se.boot.cluster(vector=survey$important_to_conserve_scale[survey$treat==0],
                            clusters=survey$village.id[survey$treat==0]),
            se.boot.cluster(vector=survey$benefits_valuable_scale[survey$treat==0],
                            clusters=survey$village.id[survey$treat==0])
)

diff <- trt - ctl

###Randomization inference

##Difference-in-means
sat_implementation <- ri.diff(ri.obj = survey.ri, outcome.var = "satisfied_with_implementation_scale", treat.var = "treat", treat.ri.var = "t.sub", info.cols = 113, sims=simn)
sat_BNP <- ri.diff(ri.obj = survey.ri, outcome.var = "satisfied_with_BNP_management_scale", treat.var = "treat", treat.ri.var = "t.sub", info.cols = 113, sims=simn)
sat_RS <- ri.diff(ri.obj = survey.ri, outcome.var = "satisfied_with_RS_scale", treat.var = "treat", treat.ri.var = "t.sub", info.cols = 113, sims=simn)
conserve <- ri.diff(ri.obj = survey.ri, outcome.var = "important_to_conserve_scale", treat.var = "treat", treat.ri.var = "t.sub", info.cols = 113, sims=simn)
benefits_val <- ri.diff(ri.obj = survey.ri, outcome.var = "benefits_valuable_scale", treat.var = "treat", treat.ri.var = "t.sub", info.cols = 113, sims=simn)

diff.null.se <- c(sat_implementation$se.null, sat_BNP$se.null, sat_RS$se.null, conserve$se.null, benefits_val$se.null)
p <- c(sat_implementation$p.one.way.greater, sat_BNP$p.one.way.greater, sat_RS$p.one.way.greater, conserve$p.one.way.greater, benefits_val$p.one.way.greater)

##FELM
sat_implementation.felm <- felm.ri(satisfied_with_implementation_scale ~ treat + factor(gender) + age + factor(approximate_monthly_income) + factor(literacy) | sc.block.x | 0 | village.id, dta=survey.ri, treat.var = "treat", rand.ob = survey.ri, rand.ob.info.cols = 113, sims=simn)
sat_BNP.felm <- felm.ri(satisfied_with_BNP_management_scale ~ treat + factor(gender) + age + factor(approximate_monthly_income) + factor(literacy) | sc.block.x | 0 | village.id, dta=survey.ri, treat.var = "treat", rand.ob = survey.ri, rand.ob.info.cols = 113, sims=simn)
sat_RS.felm <- felm.ri(satisfied_with_RS_scale ~ treat + factor(gender) + age + factor(approximate_monthly_income) + factor(literacy) | sc.block.x | 0 | village.id, dta=survey.ri, treat.var = "treat", rand.ob = survey.ri, rand.ob.info.cols = 113, sims=simn)
conserve.felm <- felm.ri(important_to_conserve_scale ~ treat + factor(gender) + age + factor(approximate_monthly_income) + factor(literacy) | sc.block.x | 0 | village.id, dta=survey.ri, treat.var = "treat", rand.ob = survey.ri, rand.ob.info.cols = 113, sims=simn)
benefits_val.felm <- felm.ri(benefits_valuable_scale ~ treat + factor(gender) + age + factor(approximate_monthly_income) + factor(literacy) | sc.block.x | 0 | village.id, dta=survey.ri, treat.var = "treat", rand.ob = survey.ri, rand.ob.info.cols = 113, sims=simn)

felm.ate <- c(sat_implementation.felm$ate, sat_BNP.felm$ate, sat_RS.felm$ate, conserve.felm$ate, benefits_val.felm$ate)
felm.null.se <- c(sat_implementation.felm$se.null, sat_BNP.felm$se.null, sat_RS.felm$se.null, conserve.felm$se.null, benefits_val.felm$se.null)
felm.p <- c(sat_implementation.felm$p.one.way.greater, sat_BNP.felm$p.one.way.greater, sat_RS.felm$p.one.way.greater, conserve.felm$p.one.way.greater, benefits_val.felm$p.one.way.greater)

###Latex Table, JOP short article

#rows
sat_imp <- bind_cols(tab_cell_desc(desc=trt,desc.se=trt.se,i=1),
                     tab_cell_desc(desc=ctl,desc.se=ctl.se,i=1),
                     tab_cell(sat_implementation),
                     tab_cell(sat_implementation.felm))
sat_bnp <- bind_cols(tab_cell_desc(desc=trt,desc.se=trt.se,i=2),
                     tab_cell_desc(desc=ctl,desc.se=ctl.se,i=2),
                     tab_cell(sat_BNP),
                     tab_cell(sat_BNP.felm))
sat_rs <- bind_cols(tab_cell_desc(desc=trt,desc.se=trt.se,i=3),
                    tab_cell_desc(desc=ctl,desc.se=ctl.se,i=3),
                    tab_cell(sat_RS),
                    tab_cell(sat_RS.felm))
conservv <- bind_cols(tab_cell_desc(desc=trt,desc.se=trt.se,i=4),
                      tab_cell_desc(desc=ctl,desc.se=ctl.se,i=4),
                      tab_cell(conserve),
                      tab_cell(conserve.felm))
benefitss <- bind_cols(tab_cell_desc(desc=trt,desc.se=trt.se,i=5),
                       tab_cell_desc(desc=ctl,desc.se=ctl.se,i=5),
                       tab_cell(benefits_val),
                       tab_cell(benefits_val.felm))

tab_results <- bind_rows(sat_imp,sat_bnp,sat_rs,conservv,benefitss)
labs_rows <- tribble(~x,"Satisfied RS Implementation","","", "Satisfied Park Management","","",
                     "Satisfied Revenue Sharing","","", "Importance Conservation","","",
                     "RS Benefits Valuable","","")
tab_out <- bind_cols(labs_rows,tab_results)

tab.n <- tribble(~N, sat_implementation.felm$N, "", "", sat_BNP.felm$N, "", "",  sat_RS.felm$N, "", "",
                 conserve.felm$N, "", "", benefits_val.felm$N, "", "")

tab_out <- bind_cols(tab_out,tab.n) %>% rename(Outcome_Variable=x,
                                               Treatment=d,
                                               Control=d1,
                                               Difference_RI=h,
                                               FE_OLS_RI=h1)

stargazer(tab_out, type = "latex", summary = FALSE, rownames = FALSE)
# Date and time: Wed, Jul 03, 2019 - 09:08:05

```

## Table A1: Subject-wise descriptive statistics from surveys

```{r tab_a1-descriptives}

## Descriptive Statistics & Setup, Resident Surveys -----

survey$female <- ifelse(survey$gender=="female",1,0)
survey$income <- ifelse(survey$approximate_monthly_income=="100-200",150,
                        ifelse(survey$approximate_monthly_income=="20-100", 60, 
                               ifelse(survey$approximate_monthly_income=="200-500", 350,
                                      ifelse(survey$approximate_monthly_income=="500-1000", 750,
                                             ifelse(survey$approximate_monthly_income=="less_20", 10, NA)))))
survey$full_literacy <- ifelse(survey$literacy=="read_well", 1, 0)
survey$BIN_num <- ifelse(survey$BIN_received=="yes",1,0)
survey$chose_again_num <- ifelse(survey$chose_the_same_project_again=="yes",1,0)

## table including breakdown by experimental condition ----
survey$survey.time <- as.POSIXct(survey$survey.time) #dplyr throws error in table immediately below with this
var.names <- c("listed_beneficiary_this_year_scale","expect_benefit_this_year_scale","satisfied_with_planning_scale",
               "satisfied_with_implementation_scale","satisfied_with_BNP_management_scale","satisfied_with_RS_scale","important_to_conserve_scale",          
               "benefits_valuable_scale","chose_again_num","age_cleaned","female","income","full_literacy","BIN_num")

sum_tab_p <- survey %>% filter(!is.na(treat)) %>% 
  select(listed_beneficiary_this_year_scale,expect_benefit_this_year_scale,satisfied_with_planning_scale,
         satisfied_with_implementation_scale,satisfied_with_BNP_management_scale,satisfied_with_RS_scale,important_to_conserve_scale,          
         benefits_valuable_scale,chose_again_num,age_cleaned,female,income,full_literacy,BIN_num) %>% # select variables to summarise
  summarise_all(funs(qmin = min(.,na.rm = TRUE), 
                     qmedian = median(.,na.rm = TRUE), 
                     qmax = max(.,na.rm = TRUE),
                     qmean = mean(.,na.rm = TRUE), 
                     qsd = sd(., na.rm = TRUE),
                     qobs = sum(!is.na(.))))

tab_tidy_p <- sum_tab_p %>% gather(stat, val) %>%
  separate(stat, into = c("var", "stat"), sep = "_q") %>%
  spread(stat, val) %>%
  arrange(match(var, var.names)) %>%
  select(var, min, median, max, mean, sd, obs) %>%
  mutate(mean_out_p = paste(coef.print2(mean), se.print2(sd), sep=" ")) %>%
  select(var, mean_out_p, obs, min, median, max)

sum_tab_1 <- survey %>% filter(!is.na(treat) & treat==1) %>% 
  select(listed_beneficiary_this_year_scale,expect_benefit_this_year_scale,satisfied_with_planning_scale,
         satisfied_with_implementation_scale,satisfied_with_BNP_management_scale,satisfied_with_RS_scale,important_to_conserve_scale,          
         benefits_valuable_scale,chose_again_num,age_cleaned,female,income,full_literacy,BIN_num) %>% # select variables to summarise
  summarise_all(funs(qmin = min(.,na.rm = TRUE), 
                     qmedian = median(.,na.rm = TRUE), 
                     qmax = max(.,na.rm = TRUE),
                     qmean = mean(.,na.rm = TRUE), 
                     qsd = sd(., na.rm = TRUE)))
#to do: gather the summaries, rerun with group_by(treat)

tab_tidy_1 <- sum_tab_1 %>% gather(stat, val) %>%
  separate(stat, into = c("var", "stat"), sep = "_q") %>%
  spread(stat, val) %>%
  arrange(match(var, var.names)) %>%
  select(min, median, max, mean, sd) %>%
  mutate(mean_out_1 = paste(coef.print2(mean), se.print2(sd), sep=" ")) %>%
  select(mean_out_1)

sum_tab_0 <- survey %>% filter(!is.na(treat) & treat==0) %>% 
  select(listed_beneficiary_this_year_scale,expect_benefit_this_year_scale,satisfied_with_planning_scale,
         satisfied_with_implementation_scale,satisfied_with_BNP_management_scale,satisfied_with_RS_scale,important_to_conserve_scale,          
         benefits_valuable_scale,chose_again_num,age_cleaned,female,income,full_literacy,BIN_num) %>% # select variables to summarise
  summarise_all(funs(qmin = min(.,na.rm = TRUE), 
                     qmedian = median(.,na.rm = TRUE), 
                     qmax = max(.,na.rm = TRUE),
                     qmean = mean(.,na.rm = TRUE), 
                     qsd = sd(., na.rm = TRUE)))
#to do: gather the summaries, rerun with group_by(treat)

tab_tidy_0 <- sum_tab_0 %>% gather(stat, val) %>%
  separate(stat, into = c("var", "stat"), sep = "_q") %>%
  spread(stat, val) %>%
  arrange(match(var, var.names)) %>%
  select(min, median, max, mean, sd) %>%
  mutate(mean_out_0 = paste(coef.print2(mean), se.print2(sd), sep=" ")) %>%
  select(mean_out_0)

covariate.labels = tribble(~var,"Listed Beneficiary","Expect to Benefit","Satisfied with Planning","Satisfied with Implementation",
                     "Satisfied with BNP Management","Satisfied with Revenue Sharing","Importance of Conserving BNP",
                     "Revenue Sharing Valuable","Choose Project Again",
                     "Age","Female","Monthly Income (UGX)","Fully Literate","Bwindi Info Network Member")

comb_tab <- bind_cols(covariate.labels,tab_tidy_p,tab_tidy_1,tab_tidy_0) %>% select(-c(var1,median))
colnames(comb_tab) <- c("Variable","Pooled", "Obs.", "Min", "Max", "Treated", "Control")

stargazer(comb_tab, type = "latex", summary = FALSE, rownames = FALSE)
# Date and time: Thu, Oct 17, 2019 - 15:02:23

```

## Table E1: Shared Status of Village Project Components by Experimental Condition

```{r tab_e1-shared_status}

table(audit.c$treat,audit.c$shared)
prop.table(table(audit.c$treat,audit.c$shared),1)
chisq.test(table(audit.c$treat,audit.c$shared))

```

## Table E2: Implementation of project components recorded by field audits, considering spillover

```{r tab_e2-spillover}

v_ids.ordered <- vlist.ordered.dta$accountability.id[!is.na(vlist.ordered.dta$accountability.id)]

#sort(v_ids.ordered) #all randomized villages accounted for 

con.mat <- matrix(0, nrow=93,ncol=93)
row.names(con.mat) <- v_ids.ordered
colnames(con.mat) <- v_ids.ordered
delta <- row(con.mat) - col(con.mat)

con.mat[abs(delta)==1 | abs(delta)==92] <- 1 #changing all directly contiguous to value == 1

#These are objects for further robustness check if necessary, per PAP
con2.mat <- con.mat
con2.mat[abs(delta)>0 & (abs(delta)<=2 | abs(delta)>=91)] <- 1 #changing all contiguous w/in two villages to value == 1

con3.mat <- con.mat
con3.mat[abs(delta)>0 & (abs(delta)<=3 | abs(delta)>=90)] <- 1 #changing all contiguous w/in two villages to value == 1

treated.villages <- p2.assigned$village.id[p2.assigned$treat2==1]

#Modified from WD replication file, Participation Phase
for (i in 1:nrow(audit.c.ri)){
  row.test <- con.mat[audit.c.ri$village_id[i]==row.names(con.mat),]==1
  row.test2 <- con2.mat[audit.c.ri$village_id[i]==row.names(con2.mat),]==1
  row.test3 <- con3.mat[audit.c.ri$village_id[i]==row.names(con3.mat),]==1
  
  con.villages <- row.names(con.mat)[row.test]
  con2.villages <- row.names(con2.mat)[row.test2]
  con3.villages <- row.names(con3.mat)[row.test3]
  
  audit.c.ri$S1[i] <- sum(con.villages %in% treated.villages)
  audit.c.ri$S2[i] <- sum(con2.villages %in% treated.villages)
  audit.c.ri$S3[i] <- sum(con3.villages %in% treated.villages)
}

audit.c.ri$S1 <- as.factor(audit.c.ri$S1)
audit.c.ri$S2 <- as.factor(audit.c.ri$S2)
audit.c.ri$S3 <- as.factor(audit.c.ri$S3)

#Remaking sub.c object from above
sub.c <- subset(audit.c.ri, shared==0)
sub.c$finished <- ifelse(sub.c$implementation_status=="finished", 1, 0)
sub.c$approved.imp <- ifelse(sub.c$project_part_approved=="approved" & sub.c$implementation_status!="not_start_exp" & sub.c$implementation_status!="not_start_not_", 1, 0)
sub.c$complete <- ifelse(sub.c$proportion_observable=="complete" | sub.c$proportion_observable=="mostly_comp", 1, 0)
sub.c$ver <- ifelse(sub.c$Verifiable=="verifiable", 1, 0)
sub.c$somewhat.ver <- ifelse(sub.c$Verifiable=="somewhat_ver", 1, 0)
sub.c$not.ver <- ifelse(sub.c$Verifiable=="not_ver", 1, 0)

#Models using lm_robust(), S1
app <- lm_robust(approved.imp ~ treat*S1, data=sub.c, clusters = sub.c$village.id, fixed_effects = sub.c$sc.block.y)
comp <- lm_robust(complete ~ treat*S1, data=sub.c[sub.c$dispersed_status=="not_dispersed",], 
                  clusters = sub.c$village.id) #block fixed-effect not possible with spillover indicator, dropping
pics <- lm_robust(pics ~ treat*S1, data=sub.c[sub.c$dispersed_status=="dispersed",], 
                  clusters = sub.c$village.id, fixed_effects = sub.c[sub.c$dispersed_status=="dispersed",]$sc.block.y)
ver <- lm_robust(ver ~ treat*S1, data=sub.c, clusters = sub.c$village.id, fixed_effects = sub.c$sc.block.y)
somewhat_ver <- lm_robust(somewhat.ver ~ treat*S1, data=sub.c, clusters = sub.c$village.id, fixed_effects = sub.c$sc.block.y)
#result: none of the models show moderation of treatment effect by spillover

#Tricking stargazer() to produce table with estimatr() output.
#Instructions at: https://declaredesign.org/r/estimatr/articles/regression-tables.html
app <- lm(approved.imp ~ treat*S1 + sc.block.y, data=sub.c)
comp <- lm(complete ~ treat*S1, data=sub.c[sub.c$dispersed_status=="not_dispersed",])
pics <- lm(pics ~ treat*S1 + sc.block.y, data=sub.c[sub.c$dispersed_status=="dispersed",])
ver <- lm(ver ~ treat*S1 + sc.block.y, data=sub.c)
somewhat_ver <- lm(somewhat.ver ~ treat*S1 + sc.block.y, data=sub.c)

stargazer(app,comp,pics,ver,somewhat_ver, type="latex",
          keep = c("treat1","S11","S12","treat1:S11","treat1:S12"),
          dep.var.caption  = NULL,
          dep.var.labels = c("Approved Project Implemented","Complete (Non-dispersed)","Pictures (Dispersed)","Fully Labeled","Partially Labeled"),
          covariate.labels = c("Treated","S1","S2","Treated X S1","Treated X S2"),
          se = starprep(app,comp,pics,ver,somewhat_ver, clusters = sub.c$village.id),
          ci.custom = starprep(app,comp,pics,ver,somewhat_ver, stat = "ci"),
          report = c('vcs'), df = FALSE, omit.stat = c("rsq","ser","F"),
          model.numbers = FALSE,
          notes.append = FALSE, notes.label = "", notes = ""
) #Date and time: Wed, Mar 13, 2019 - 13:45:43, some post-formatting necessary

```

## Table F1: Heterogeneous Effects on Survey Outcomes Based on Subscriber Status

```{r tab_f1-hte}

#Models using lm_robust()
sat_implementation <- lm_robust(satisfied_with_implementation_scale ~ treat*BIN_received + gender + age + approximate_monthly_income + literacy + sc.block.x, clusters = survey.ri$village.id, data=survey.ri)
ci_out_imp <- cbind(sat_implementation$conf.low,sat_implementation$conf.high)

sat_BNP <- lm_robust(satisfied_with_BNP_management_scale ~ treat*BIN_received + gender + age + approximate_monthly_income + literacy + sc.block.x, clusters = survey.ri$village.id, data=survey.ri)
ci_out_BNP <- cbind(sat_BNP$conf.low,sat_BNP$conf.high)

sat_RS <- lm_robust(satisfied_with_RS_scale ~ treat*BIN_received + gender + age + approximate_monthly_income + literacy + sc.block.x, clusters = survey.ri$village.id, data=survey.ri)
ci_out_RS <- cbind(sat_RS$conf.low,sat_RS$conf.high)

conserve <- lm_robust(important_to_conserve_scale ~ treat*BIN_received + gender + age + approximate_monthly_income + literacy + sc.block.x, clusters = survey.ri$village.id, data=survey.ri)
ci_out_con <- cbind(conserve$conf.low,conserve$conf.high)

benefits_val <- lm_robust(benefits_valuable_scale ~ treat*BIN_received + gender + age + approximate_monthly_income + literacy + sc.block.x, clusters = survey.ri$village.id, data=survey.ri)
ci_out_ben <- cbind(benefits_val$conf.low,benefits_val$conf.high)
#result: none of the models show HTE by BIN subscriber status

#lm() model objects for stargazer
sat_implementation <- lm(satisfied_with_implementation_scale ~ treat*BIN_received + gender + age + approximate_monthly_income + literacy + sc.block.x, data=survey.ri)
sat_BNP <- lm(satisfied_with_BNP_management_scale ~ treat*BIN_received + gender + age + approximate_monthly_income + literacy + sc.block.x, data=survey.ri)
sat_RS <- lm(satisfied_with_RS_scale ~ treat*BIN_received + gender + age + approximate_monthly_income + literacy + sc.block.x, data=survey.ri)
conserve <- lm(important_to_conserve_scale ~ treat*BIN_received + gender + age + approximate_monthly_income + literacy + sc.block.x, data=survey.ri)
benefits_val <- lm(benefits_valuable_scale ~ treat*BIN_received + gender + age + approximate_monthly_income + literacy + sc.block.x, data=survey.ri)


stargazer(sat_implementation, sat_BNP, sat_RS, conserve, benefits_val, type = "latex",
          keep = c("treat","BIN_receivedyes","treat:BIN_receivedyes"),
          dep.var.caption  = NULL,
          dep.var.labels = c("Satisfied RS Implementation",
                             "Satisfied Park Management",
                             "Satisfied Revenue Sharing",
                             "Importance Conservation",
                             "RS Benefits Valuable"),
          covariate.labels = c("Treated","Subscriber","Treated X Subscriber"),
          ci = TRUE,
          ci.custom = list(ci_out_imp,ci_out_BNP,ci_out_RS,ci_out_con,ci_out_ben),
          report = c('vcs'), df = FALSE, omit.stat = c("rsq","ser","F"),
          model.numbers = FALSE,
          notes.append = FALSE, notes.label = "", notes = ""
) #Date and time: Thu, Oct 10, 2019 - 15:10:34, some post-formatting necessary

```
